cd "C:\Users\remij\Desktop\Water and conflict SSA 11282022\Data\DepthToGroundwater MacDonald\DepthToGroundwater2021\Finalcombined_map"

*****************************************************************
*** DEPTH TO GROUNDWATER FROM MACDONALD ET AL - MEAN AND GINI ***
*****************************************************************

* Data from Macdonald et al at the pixel level (appears to be 0.05*0.05 degree) 

*** SHARE OF EACH DEPTH CATEGORY IN EACH CELL ***

clear
import delimited "depth_2021_points.csv"
keep value gid 
count
* MacDonald = 1,002,871 pixels
codebook gid
* MacDonald = 10,332 cells 
tab value, m
* VS 0-7 - 254,972 obs
* S 7-25 - 167,426
* SM 25-50 - 164,138
* M 50-100 - 121,364
* D 100-250 - 144,664
* VD > 250 - 150,307 obs

* 2012 VERSION
* DTWAFRICA_ |      Freq.     Percent        Cum.
*------------+-----------------------------------
*          D |    144,664       14.42       14.42
*          M |    121,364       12.10       26.53
*          S |    175,835       17.53       44.06
*         SM |    164,150       16.37       60.43
*         VD |    150,315       14.99       75.42
*         VS |    246,543       24.58      100.00
*------------+-----------------------------------
*      Total |  1,002,871      100.00

gen dtwafrica_ = ""
replace dtwafrica_ = "VS" if value == 1
replace dtwafrica_ = "S" if value == 7
replace dtwafrica_ = "SM" if value == 25
replace dtwafrica_ = "M" if value == 50
replace dtwafrica_ = "D" if value == 100
replace dtwafrica_ = "VD" if value == 250
tab dtwafrica_, m
save temp, replace
foreach X in VS S SM M D VD {
gen share`X' = (dtw == "`X'")
}
collapse (sum) share*, by(gid)
egen sum = rsum(share*)
tab sum, m
* most cells have 9732 groundwater depth pixels
foreach X in VS S SM M D VD {
replace share`X' = share`X'/sum
}
* gd for groundwater depth
ren shareVS gdshare_0_7
label var gdshare_0_7 "Share of 0-7m groundwater depth category in the cell (MacDonald)"
ren shareS gdshare_7_25
label var gdshare_7_25 "Share of 7-25m groundwater depth category in the cell (MacDonald)"
ren shareSM gdshare_25_50
label var gdshare_25_50 "Share of 25-50m groundwater depth category in the cell (MacDonald)"
ren shareM gdshare_50_100
label var gdshare_50_100 "Share of 50-100m groundwater depth category in the cell (MacDonald)"
ren shareD gdshare_100_250
label var gdshare_100_250 "Share of 100-250m groundwater depth category in the cell (MacDonald)"
ren shareVD gdshare_250
label var gdshare_250 "Share of 250+m groundwater depth category in the cell (MacDonald)"
drop sum
gen gdshare_0_25 = gdshare_0_7+gdshare_7_25
label var gdshare_0_25 "Share of 0-25m groundwater depth category in the cell (MacDonald)"
gen gdshare_0_50 = gdshare_0_7+gdshare_7_25+gdshare_25_50
label var gdshare_0_50 "Share of 0-50m groundwater depth category in the cell (MacDonald)"
gen gdshare_0_100 = gdshare_0_7+gdshare_7_25+gdshare_25_50+gdshare_50_100
label var gdshare_0_100 "Share of 0-100m groundwater depth category in the cell (MacDonald)"
sort gid
save gdmean_macdonald_shares, replace

*** MEAN DEPTH IN EACH CELL ***

use temp, clear
keep dtwafrica_ gid 
* USING MEDIAN VALUE EACH TIME *
* 0-7 => 3.5
* 7-25 => 16 (+12.5)
* 25-50 => 37.5 (+21.5)
* 50-100 => 75 (+37.5)
* 100-250 => 175 (+100)
* >250 => 300 (we arbitrarily add +125)
gen median = 3.5 if dtw == "VS" 
replace median = 16  if dtw == "S"
replace median = 37.5 if dtw == "SM"
replace median = 75 if dtw == "M"
replace median = 175 if dtw == "D"
replace median = 300 if dtw == "VD"
tab median, m
* USING MIN VALUE EACH TIME *
gen min = 3.5 if dtw == "VS" 
replace min = 7 if dtw == "S"
replace min = 25 if dtw == "SM"
replace min = 50 if dtw == "M"
replace min = 100 if dtw == "D"
replace min = 250 if dtw == "VD"
tab min, m
* USING MAX VALUE EACH TIME *
* 50-100 => 100
* 100-250 => 250 (+150)
* >250 => 400 (we arbitrarily add +200)
gen max = 7 if dtw == "VS" 
replace max = 25 if dtw == "S"
replace max = 50 if dtw == "SM"
replace max = 100 if dtw == "M"
replace max = 250 if dtw == "D"
replace max = 400 if dtw == "VD"
tab max, m
* NUMBER OF PIXELS *
gen count = 1
bysort gid: egen sumcount = sum(count)
codebook gid 
* 10,332
codebook gid if sumcount < 100
* 599
ren sumcount numpixels
* GINI INDEX *
egen gdgini_med = gini(median), by(gid)
egen gdgini_min= gini(min), by(gid)
egen gdgini_max = gini(max), by(gid)
collapse (mean) median min max (max) gdgini* numpixels, by(gid)
*twoway (scatter gini_med numpixels)(lfit gini_med numpixels)
*twoway (scatter gini_med numpixels if numpixels < 100)(lfit gini_med numpixels if numpixels < 100)
ren median gdmean_med
label var gdmean_med "Mean of the median groundwater depth (m) in the cell (MacDonald)"
ren min gdmean_min
label var gdmean_min "Mean of the min groundwater depth (m) in the cell (MacDonald)"
ren max gdmean_max
label var gdmean_max "Mean of the max groundwater depth (m) in the cell (MacDonald)"
label var gdgini_med "Gini of the median groundwater depth (m) in the cell (MacDonald)"
label var gdgini_min "Gini of the min groundwater depth (m) in the cell (MacDonald)"
label var gdgini_max "Gini of the max groundwater depth (m) in the cell (MacDonald)"
label var numpixels "Number of Macdonald pixels in the cell"
sort gid
save gdmean_macdonald_means, replace

corr gdmean*
corr gdgini*
foreach X in med min max {
corr gdmean_`X' gdgini_`X'
}

*** MEAN COST OF DEPTH IN EACH CELL ***
 
use temp, clear
keep dtwafrica_ gid 
tab dtwafrica_, m
* Assumptions of costs = 
* VS 0-7 - from Sekhri 2014, "The submersible pumps cost three times as much as centrifugal pumps"
* from Sekhri 2014, cost of submersible pump = 6,830 rupees = 83 USD today
* S 7-25 - from Sekhri 2014, cost of centrifugal pump = 16,750 ruppes = 204 USD today
* SM 25-50 - from Uhl et al 2009, "hand-operated pumps are the most common manual pumps and can function to well depths of up to 50 neters. Electric or diesel-powered pumps are the most common used for deep wells". cost = $450-500 = we use $475
* M 50-100 - random choice = $1000
* D 100-250 - from Uhl et al 2009, machine-drive rigs needed (from "$100,000 to over $1,000,000, depending on the capacity of the equipment)"
* VD > 250 - assume $250,000 based on Uhl et al's citation above
gen cost = 83 if dtw == "VS" 
replace cost = 204  if dtw == "S"
replace cost = 475 if dtw == "SM"
replace cost = 1000 if dtw == "M"
replace cost = 100000 if dtw == "D"
replace cost = 250000 if dtw == "VD"
tab cost, m
egen gdgini_cost = gini(cost), by(gid)
collapse (mean) cost (max) gdgini_*, by(gid)
ren cost gdmean_cost
label var gdmean_cost "Mean cost of accessing groundwater (dollars) in the cell (MacDonald)"
label var gdgini_cost "Mean cost of accessing groundwater (dollars) in the cell (MacDonald)"
corr gdmean_cost gdgini_cost
sort gid
save gdmean_macdonald_cost, replace

*** WE COMBINE THEM ***

use gdmean_macdonald_shares, clear
sort gid
merge gid using gdmean_macdonald_means
tab _m
drop _m
sort gid
merge gid using gdmean_macdonald_cost
tab _m
drop _m
* Correlations between these
corr gdmean_*
corr gdgini_*
* Quite high
save gdmean_macdonald_all, replace 

use gdmean_macdonald_all, clear
desc 
order gid numpixel gdshare* gdmean* gdgini* 
foreach X of varlist gdshare_0_7-gdgini_cost {
ren `X' `X'_md
}
sort gid
save gdmean_macdonald_all, replace 

use gdmean_macdonald_all, clear
desc
